/*=========================================================================
Elite College as Engines of Upward Mobility: Evidence from Colombia's Ser Pilo Paga
Authors: Juliana Londoño-Vélez, Catherine Rodriguez, Fabio Sánchez
and Luis Esteban Álvarez-Arango

Creation date: June 6th, 2025
--------------------------------------------------------------------------
Table A1
=========================================================================*/
	
use "${data}/data_RD", clear
keep if icfes_per==20122 |icfes_per==20132 
	
gen puntaje_pro=score_saberpro
gen p50_score_sb11=puntaje_global_s11_final
gen p50_score_pro=score_saberpro
gen p50_mw_9= mw_9
gen p50_ln_wage_9= ln_wage_9
gen std_sb11=(puntaje_global_s11_final-250)/30
gen p50_std_sb11=std_sb11
** duración carrera
tostring semestre1, gen(aux1)
tostring semestre_grado, gen(aux2)
foreach x in aux1 aux2 {
	replace `x'=regexr(`x',"2$","5")
	replace `x'=regexr(`x',"1$","0")
	destring `x', replace
	replace `x'=`x'/10
}
	
* ingreso snies vs presentación PRO
gen grado_snies_pro=(periodo_pro-aux1)*2 if nivel_programa=="Universitaria"
	
* ingreso snies vs grado snies
gen grado_snies_snies=(aux2-aux1)*2 if nivel_programa=="Universitaria"
	
gen p50_grado_snies_pro= grado_snies_pro
gen p50_grado_snies_snies=grado_snies_snies
	
collapse (max) hq_14_pri hq_14_pub lq_14_pub lq_14_pri tiempo_completo matriculados PPE Ingresos_Investigacion TotalGastOpdeInvestig TotalGastOpdeBienestar TotalGastOp (mean) puntaje_global_s11_final std_sb11 puntaje_pro grado_snies_pro grado_snies_snies mw_9 ln_wage_9 (p50) p50_score_sb11 p50_std_sb11 p50_score_pro p50_grado_snies_pro p50_grado_snies_snies p50_mw_9 p50_ln_wage_9 (sum) access_he graduation_exam_8 work_9, by(codigo_ies nombre_ies icfes_per)
	
gen p_graduacion= (graduation_exam_8/access_he)*100
gen p_work_9= (work_9/access_he)*100
drop if codigo_ies==.
	
reshape wide puntaje_global_s11_final std_sb11 puntaje_pro grado_snies_pro grado_snies_snies mw_9 ln_wage_9 p50_score_sb11 p50_std_sb11 p50_score_pro p50_grado_snies_pro p50_grado_snies_snies p50_mw_9 p50_ln_wage_9 access_he graduation_exam_8 work_9 p_graduacion p_work_9, i(codigo_ies tiempo_completo matriculados PPE Ingresos_Investigacion TotalGastOpdeInvestig TotalGastOpdeBienestar TotalGastOp) j(icfes_per)
	
order nombre_ies codigo_ies hq_14_pri hq_14_pub lq_14_pub lq_14_pri
	
rename (puntaje_global_s11_final20122 puntaje_global_s11_final20132) (score_sb11_12 score_sb11_13)
rename (puntaje_pro20122 puntaje_pro20132) (score_pro_12 score_pro_13)
rename (p50_score_sb1120122 p50_score_sb1120132) (p50_score_sb11_12 p50_score_sb11_13)
rename (p50_score_pro20122 p50_score_pro20132) (p50_score_pro_12 p50_score_pro_13)
rename (access_he20122 access_he20132) (access_he_12 access_he_13)
rename (graduation_exam_820122 graduation_exam_820132) (graduation_exam_8_12 graduation_exam_8_13) 
rename (p_graduacion20122 p_graduacion20132) (p_graduacion_8_12 p_graduacion_8_13)
rename (mw_920122 ln_wage_920122 p50_mw_920122 p50_ln_wage_920122 work_920122 p_work_920122) (mw_9_12 ln_wage_9_12 p50_mw_9_12 p50_ln_wage_9_12 work_9_12 p_work_9_12)
rename (mw_920132 ln_wage_920132 p50_mw_920132 p50_ln_wage_920132 work_920132 p_work_920132) (mw_9_13 ln_wage_9_13 p50_mw_9_13 p50_ln_wage_9_13 work_9_13 p_work_9_13)
rename (grado_snies_pro20122 grado_snies_pro20132) (tim_g_snies_pro_12 tim_g_snies_pro_13)
rename (grado_snies_snies20122 grado_snies_snies20132) (tim_gra_snies_snies_12 tim_gra_snies_snies_13)
rename (p50_grado_snies_pro20122 p50_grado_snies_pro20132) (p50_tim_gra_snies_pro_12 p50_tim_gra_snies_pro_13)
rename (p50_grado_snies_snies20122 p50_grado_snies_snies20132) (p50_tim_gra_snies_snies_12 p50_tim_gra_snies_snies_13)
rename (std_sb1120122 std_sb1120132 p50_std_sb1120122 p50_std_sb1120132) (std_sb11_12 std_sb11_13 p50_std_sb11_12 p50_std_sb11_13)
	
tempfile maestra
save `maestra'
	
	
**** Docentes Phd con corte a junio del 2016
	
import excel "${raw}/SNIES Docentes 2014.xlsx", sheet("Docentes 2014") cellrange(A8:X16561) firstrow clear
	
gen docentes_phd=NodeDocentes if Máximoniveldeformacióndeld=="DOCTORADO"
	
collapse (sum) NodeDocentes docentes_phd, by(CódigodelaInstitución IESPADRE Semestre)
collapse (mean) NodeDocentes docentes_phd, by(CódigodelaInstitución IESPADRE)
	
gen por_docentes_phd= (docentes_phd/NodeDocentes)*100
rename CódigodelaInstitución codigo_ies
	
tempfile docentes_2014
save `docentes_2014'
	
	
**** Costo matrícula

import excel "${raw}/Programas - costo matrícula.xlsx", sheet("Programas") firstrow clear
keep if NIVEL_DE_FORMACIÓN=="Universitario"
replace PERIODICIDAD="Semestral" if NÚMERO_PERIODOS_DE_DURACIÓN>=10 & PERIODICIDAD=="Anual" 
	
keep if NIVEL_ACADÉMICO=="Pregrado"
winsor2 COSTO_MATRÍCULA_ESTUD_NUEVOS if SECTOR=="Oficial", replace cuts(0 95)
	
gen precio_smlv= COSTO_MATRÍCULA_ESTUD_NUEVOS/908526
gen p50_precio_smlv=precio_smlv
		
collapse (mean) precio_smlv (p50) p50_precio_smlv, by(CÓDIGO_INSTITUCIÓN CÓDIGO_INSTITUCIÓN_PADRE)
rename CÓDIGO_INSTITUCIÓN codigo_ies
destring codigo_ies, replace
rename (precio_smlv p50_precio_smlv) (precio_smlv1 p50_precio_smlv1)
tempfile precio1
save `precio1'

import excel "${raw}/Valor matrícula Educación Superior y Universidades 2014 a 2017.xlsx", sheet("20142") firstrow case(lower) clear
keep códigoies matrículaalumnonuevo
sort códigoies
drop if matrículaalumnonuevo<100000 
destring códigoies, replace
rename códigoies codigo_ies
rename matrículaalumnonuevo costo_matricula
gen precio_smlv= costo_matricula/908526
gen p50_precio_smlv=precio_smlv
collapse (mean) precio_smlv (p50) p50_precio_smlv, by(codigo_ies)
merge 1:1 codigo_ies using `precio1'
tempfile precio
save `precio'

use "${data}/data_RD", clear
keep if icfes_per>=20121 & icfes_per<=20142 
keep codigo_ies costo_matricula_1_spp costo_matricula_2_spp costo_matricula_3_spp costo_matricula_4_spp costo_matricula_5_spp costo_matricula_6_spp
rename *_spp *
egen costo_matricula=rowmax(costo_matricula_1 costo_matricula_2 costo_matricula_3 costo_matricula_4 costo_matricula_5 costo_matricula_6)
drop costo_matricula_1 costo_matricula_2 costo_matricula_3 costo_matricula_4 costo_matricula_5 costo_matricula_6
gduplicates drop
gen costo_matricula_smlv= costo_matricula/908526
gen p50_costo_matricula=costo_matricula_smlv
collapse (mean) costo_matricula_smlv (p50) p50_costo_matricula, by(codigo_ies)
drop if codigo_ies==3114 | codigo_ies==9107 | codigo_ies==3902 | codigo_ies==9102 | codigo_ies==9105
tempfile costo_matricula
save `costo_matricula'

**
import excel "${raw}\Programas - costo matrícula.xlsx", sheet("Programas") firstrow clear
keep if NIVEL_DE_FORMACIÓN=="Universitario"
replace PERIODICIDAD="Semestral" if NÚMERO_PERIODOS_DE_DURACIÓN>=10 & PERIODICIDAD=="Anual" 
gen num_semestres_prog= NÚMERO_PERIODOS_DE_DURACIÓN if PERIODICIDAD=="Semestral"
replace num_semestres_prog=NÚMERO_PERIODOS_DE_DURACIÓN*2 if PERIODICIDAD=="Anual" 
replace num_semestres_prog=NÚMERO_PERIODOS_DE_DURACIÓN if PERIODICIDAD=="Periodos" 
replace num_semestres_prog=(NÚMERO_PERIODOS_DE_DURACIÓN*2)/6 if PERIODICIDAD=="Bimensual" | PERIODICIDAD=="Bimestral" 
replace num_semestres_prog=(NÚMERO_PERIODOS_DE_DURACIÓN*4)/6 if PERIODICIDAD=="Cuatrimestral" 
replace num_semestres_prog=(NÚMERO_PERIODOS_DE_DURACIÓN)/6 if PERIODICIDAD=="Mensual" 
replace num_semestres_prog=(NÚMERO_PERIODOS_DE_DURACIÓN*3)/6 if PERIODICIDAD=="Trimestral" 
	
keep if NIVEL_ACADÉMICO=="Pregrado"
gen p50_num_semestres_prog= num_semestres_prog
	
collapse (mean) num_semestres_prog (p50) p50_num_semestres_prog, by(CÓDIGO_INSTITUCIÓN CÓDIGO_INSTITUCIÓN_PADRE)
rename CÓDIGO_INSTITUCIÓN codigo_ies
destring codigo_ies, replace
tempfile semestres
save `semestres'
	
**** Acreditación
use "${raw}/IES_acreditadas_2014_2020.dta", clear
egen hq_15_19= rowmax(acreditación_2015 acreditación_2016 acreditación_2017 acreditación_2018 acreditación_2019)
replace hq_15_19=. if acreditación_2014==1
egen hq_14_19=rowmax(acreditación_2014 acreditación_2015 acreditación_2016 acreditación_2017 acreditación_2018 acreditación_2019)
gen lq_15_19=1 if hq_14_19==.
keep codigo_ies acreditación_2014 hq_15_19 lq_15_19 sector
tempfile acreditacion
save `acreditacion'

****************************************************************************
**** CRUCES
	
use `maestra', clear
merge 1:1 codigo_ies using `docentes_2014', nogen keep(1 3)
merge 1:1 codigo_ies using `costo_matricula', nogen keep(1 3)
merge 1:1 codigo_ies using `acreditacion', nogen keep(1 3)
merge 1:1 codigo_ies using `semestres', nogen keep(1 3)
merge 1:1 codigo_ies using `precio', nogen keep(1 3)
	
rename IESPADRE cod_ies_padre
replace precio_smlv=precio_smlv*2
replace p50_precio_smlv=p50_precio_smlv*2
replace precio_smlv=costo_matricula_smlv if hq_14_pri==1 | lq_14_pri==1
replace p50_precio_smlv=p50_costo_matricula if hq_14_pri==1 | lq_14_pri==1
replace costo_matricula_smlv=. if hq_14_pri==1 | lq_14_pri==1
replace p50_costo_matricula=. if hq_14_pri==1 | lq_14_pri==1
order nombre_ies codigo_ies cod_ies_padre sector acreditación_2014 hq_15_19 hq_14_pri hq_14_pub lq_14_pub lq_14_pri
	
gen miss= missing(costo_matricula_smlv)
gen type = .
replace type =1 if hq_14_pri==1
replace type =2 if hq_14_pub==1
replace type =3 if lq_14_pri==1
replace type =4 if lq_14_pub==1
			
label define type 1 "1: Private HEI - HQ 14" 2 "2: Public HEI - HQ 14" 3 "3: Private HEI - LQ 14" 4 "4: Public HEI - LQ 14"
	label values type type

egen mean_sb11= rowmean(score_sb11_12 score_sb11_13)
egen mean_sbpro= rowmean(score_pro_12 score_pro_13)
egen mean_p_graduation= rowmean(p_graduacion_8_12 p_graduacion_8_13)
egen mean_mw= rowmean(mw_9_12 mw_9_13)
egen mean_ln_wage= rowmean(ln_wage_9_12 ln_wage_9_13)
egen mean_p_work= rowmean(p_work_9_12 p_work_9_13)
egen mean_tim_g_snies_pro= rowmean(tim_g_snies_pro_12 tim_g_snies_pro_13)
egen mean_tim_g_snies_snies= rowmean(tim_gra_snies_snies_12 tim_gra_snies_snies_13)
egen mean_std_sb11= rowmean(std_sb11_12 std_sb11_13)
	
egen p50_sb11= rowmean(p50_score_sb11_12 p50_score_sb11_13)
egen p50_sbpro= rowmean(p50_score_pro_12 p50_score_pro_13)
egen p50_p_graduation= rowmean(p_graduacion_8_12 p_graduacion_8_13)
egen p50_mw =rowmean(p50_mw_9_12 p50_mw_9_13)
egen p50_ln_wage= rowmean(p50_ln_wage_9_12 p50_ln_wage_9_13)
egen p50_p_work= rowmean(p_work_9_12 p_work_9_13)
egen p50_tim_g_snies_pro= rowmean(p50_tim_gra_snies_pro_12 p50_tim_gra_snies_pro_13)
egen p50_tim_g_snies_snies= rowmean(p50_tim_gra_snies_snies_12 p50_tim_gra_snies_snies_13)
egen p50_std_sb11= rowmean(p50_std_sb11_12 p50_std_sb11_13)
	
rename por_docentes_phd mean_p_docentes_phd
gen p50_p_docentes_phd=mean_p_docentes_phd
rename num_semestres_prog mean_num_semestres_prog
	
rename costo_matricula_smlv mean_costo_matricula
rename precio_smlv mean_precio_smlv
egen Nest = rsum(access_he_12 access_he_13)
gen N_sedes = 1	
replace cod_ies_padre=codigo_ies if cod_ies_padre==.
gen N_ies = 1 if codigo_ies== cod_ies_padre
	
matrix descriptivas1=J(30,4,.)
	
forvalues j=1/4{
	local i=1
	foreach x in sb11 std_sb11 sbpro p_graduation tim_g_snies_pro tim_g_snies_snies p_docentes_phd num_semestres_prog costo_matricula precio_smlv p_work mw ln_wage{
		sum mean_`x' if type==`j'
		matrix descriptivas1[`i',`j']=r(mean)	
		local i= `i'+1
		
		sum p50_`x' if type==`j', d
		matrix descriptivas1[`i',`j']=r(p50)	
		local i= `i'+1
		
		global vars2 $vars2 `x'
		
	}
	
	sum miss if type==`j', d
	matrix descriptivas1[27,`j']=r(mean)
	sum Nest if type==`j', d
	matrix descriptivas1[28,`j']=r(sum)
	sum N_sedes if type==`j', d
	matrix descriptivas1[29,`j']=r(sum)
	sum N_ies if type==`j', d
	matrix descriptivas1[30,`j']=r(sum)	
}
	
clear
svmat descriptivas1
gen nombre=""
replace nombre="Mean SABER 11 score" in 1
replace nombre="P50 SABER 11 score" in 2
replace nombre="Mean (Standardized) SABER 11 score" in 3
replace nombre="P50 (Standardized) SABER 11 score" in 4		
replace nombre="Mean (Standardized) SABER PRO score" in 5
replace nombre="P50 (Standardized) SABER PRO score" in 6
replace nombre="Mean graduation rate (%)" in 7
replace nombre="P50 graduation rate (%)" in 8
replace nombre="Mean graduation length (in semesters) - access SNIES vs graduation PRO" in 9
replace nombre="P50 graduation length (in semesters) - access SNIES vs graduation PRO" in 10
replace nombre="Mean graduation length (in semesters) - access SNIES vs graduation SNIES" in 11
replace nombre="P50 graduation length (in semesters) - access SNIES vs graduation SNIES" in 12
replace nombre="Mean faculty with PhD (%)" in 13
replace nombre="P50 faculty with PhD (%)" in 14
replace nombre="Mean program length (in semesters)" in 15
replace nombre="P50 program length (in semesters)" in 16
replace nombre="Mean cost per student (in min wages)" in 17
replace nombre="P50 cost per student (in min wages)" in 18	
replace nombre="Mean sticker price of tuition (in min wages)" in 19
replace nombre="P50 sticker price of tuition (in min wages)" in 20
replace nombre="Mean formal employment 9 years after HS (%)" in 21
replace nombre="P50 formal employment 9 years after HS (%)" in 22
replace nombre="Mean earnings 9 years after HS (in min wages, w/0s)" in 23
replace nombre="P50 earnings 9 years after HS (in min wages, w/0s)" in 24
replace nombre="Mean of ln earnings 9 years after HS" in 25
replace nombre="P50 of ln earnings 9 years after HS" in 26
replace nombre="Mean college campuses without tuition info (%)" in 27
replace nombre="N students" in 28
replace nombre="N college campuses" in 29
replace nombre="N colleges" in 30
		
rename (descriptivas11 descriptivas12 descriptivas13 descriptivas14) (hq_14_pri hq_14_pub lq_14_pri lq_14_pub)

keep if nombre=="Mean SABER 11 score" | nombre=="Mean (Standardized) SABER 11 score" | nombre=="Mean (Standardized) SABER PRO score" | nombre=="Mean graduation rate (%)" | nombre=="Mean faculty with PhD (%)" | nombre=="Mean program length (in semesters)" | nombre=="Mean sticker price of tuition (in min wages)" | nombre=="Mean cost per student (in min wages)" | nombre=="N students" | nombre=="N college campuses" | nombre=="N colleges"

order nombre

save "${Intermediate}/Tab_A1.dta", replace
